Code
library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)df <- read.csv("../data/rawdata_participants.csv") |>
mutate(across(everything(), ~ifelse(.x == "", NA, .x))) |>
mutate(Experimenter = case_when(
Language=="English" & Experimenter %in% c("reddit7", "reddit8", "reddit1", "reddit2", "reddit5") ~ "Reddit (other)",
.default = Experimenter
))
dftask <- read.csv("../data/rawdata_task.csv") |>
full_join(
df[c("Participant", "Sex", "SexualOrientation")],
by = join_by(Participant)
)The initial sample consisted of 556 participants (Mean age = 32.7, SD = 12.5, range: [18, 80]; Sex: 32.2% females, 66.9% males, 0.9% other; Education: Bachelor, 36.87%; Doctorate, 6.29%; High School, 32.55%; Master, 21.76%; Other, 1.98%; Primary School, 0.54%; Country: 21.58% USA, 18.53% UK, 15.29% Colombia, 11.33% France, 33.27% other).
# Create Sexual "relevance" variable (Relevant, irrelevant, non-erotic)
dftask <- dftask |>
mutate(Relevance = case_when(
Type == "Non-erotic" ~ "Non-erotic",
Sex == "Male" & SexualOrientation == "Heterosexual" & Category == "Female" ~ "Relevant",
Sex == "Female" & SexualOrientation == "Heterosexual" & Category == "Male" ~ "Relevant",
Sex == "Male" & SexualOrientation == "Homosexual" & Category == "Male" ~ "Relevant",
Sex == "Female" & SexualOrientation == "Homosexual" & Category == "Female" ~ "Relevant",
# TODO: what to do with "Other"?
SexualOrientation %in% c("Bisexual", "Other") & Category %in% c("Male", "Female") ~ "Relevant",
.default = "Irrelevant"
)) plot_recruitement <- function(df) {
# Consecutive count of participants per day (as area)
df |>
mutate(Date = as.Date(Date, format = "%d/%m/%Y")) |>
group_by(Date, Language, Experimenter) |>
summarize(N = n()) |>
ungroup() |>
# https://bocoup.com/blog/padding-time-series-with-r
complete(Date, Language, Experimenter, fill = list(N = 0)) |>
group_by(Language, Experimenter) |>
mutate(N = cumsum(N)) |>
ggplot(aes(x = Date, y = N)) +
geom_area(aes(fill=Experimenter)) +
scale_y_continuous(expand = c(0, 0)) +
labs(
title = "Recruitment History",
x = "Date",
y = "Total Number of Participants"
) +
see::theme_modern()
}
plot_recruitement(df) +
facet_wrap(~Language, nrow=5, scales = "free_y")# Table
table_recruitment <- function(df) {
summarize(df, N = n(), .by=c("Language", "Experimenter")) |>
arrange(desc(N)) |>
gt::gt() |>
gt::opt_stylize() |>
gt::opt_interactive(use_compact_mode = TRUE) |>
gt::tab_header("Number of participants per recruitment source")
}
table_recruitment(df)plot_recruitement(filter(df, Language == "English"))table_recruitment(filter(df, Language == "English"))plot_recruitement(filter(df, Language == "French"))table_recruitment(filter(df, Language == "French"))plot_recruitement(filter(df, Language == "Italian"))table_recruitment(filter(df, Language == "Italian"))plot_recruitement(filter(df, Language == "Colombian"))table_recruitment(filter(df, Language == "Colombian"))The majority of participants found the study to be a “fun” experience. Interestingly, reports of “fun” were significantly associated with finding at least some stimuli arousing. Conversely, reporting “no feelings” was associated with finding the experiment “boring”.
df |>
select(starts_with("Feedback"), -Feedback_Comments) |>
pivot_longer(everything(), names_to = "Question", values_to = "Answer") |>
group_by(Question, Answer) |>
summarise(prop = n()/nrow(df), .groups = 'drop') |>
complete(Question, Answer, fill = list(prop = 0)) |>
filter(Answer == "True") |>
mutate(Question = str_remove(Question, "Feedback_"),
Question = str_replace(Question, "AILessArousing", "AI = Less arousing"),
Question = str_replace(Question, "AIMoreArousing", "AI = More arousing"),
Question = str_replace(Question, "CouldNotDiscriminate", "Hard to discriminate"),
Question = str_replace(Question, "LabelsIncorrect", "Labels were incorrect"),
Question = str_replace(Question, "NoFeels", "Didn't feel anything"),
Question = str_replace(Question, "CouldDiscriminate", "Easy to discriminate"),
Question = str_replace(Question, "LabelsReversed", "Labels were reversed")) |>
mutate(Question = fct_reorder(Question, desc(prop))) |>
ggplot(aes(x = Question, y = prop)) +
geom_bar(stat = "identity") +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks(), labels=scales::percent) +
labs(x="Feedback", y = "Participants", title = "Feedback") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1), angle = 45, hjust = 1),
axis.title.x = element_blank()
)cor <- df |>
select(starts_with("Feedback"), -Feedback_Comments) |>
mutate_all(~ifelse(.=="True", 1, 0)) |>
correlation(method="tetrachoric", redundant = TRUE) |>
correlation::cor_sort() |>
correlation::cor_lower()
cor |>
mutate(val = paste0(insight::format_value(rho), format_p(p, stars_only=TRUE))) |>
mutate(Parameter2 = fct_rev(Parameter2)) |>
mutate(Parameter1 = fct_relabel(Parameter1, \(x) str_remove_all(x, "Feedback_")),
Parameter2 = fct_relabel(Parameter2, \(x) str_remove_all(x, "Feedback_"))) |>
ggplot(aes(x=Parameter1, y=Parameter2)) +
geom_tile(aes(fill = rho), color = "white") +
geom_text(aes(label = val), size = 3) +
labs(title = "Feedback Co-occurence Matrix") +
scale_fill_gradient2(
low = "#2196F3",
mid = "white",
high = "#F44336",
breaks = c(-1, 0, 1),
guide = guide_colourbar(ticks=FALSE),
midpoint = 0,
na.value = "grey85",
limit = c(-1, 1)) +
theme_minimal() +
theme(legend.title = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))outliers <- c(
# "S206" # Collapsed RTs in both phases
# "S399" # Negative Arousal-Valence correlations
"S428", # Only 0s for arousal (creates statistical problems)
"S498", # Only 0s for arousal (creates statistical problems)
"S508" # Only 0s for arousal (creates statistical problems)
)
potentials <- list()df |>
ggplot(aes(x=Mobile, fill=Language)) +
geom_bar() +
geom_hline(yintercept=0.5*nrow(df), linetype="dashed") +
theme_modern()We removed 153 participants that participated with a mobile device.
df <- filter(df, Mobile == "False")
dftask <- filter(dftask, Participant %in% df$Participant)The experiment’s median duration is 24.85 min (50% CI [18.31, 26.17]).
df |>
mutate(Participant = fct_reorder(Participant, Experiment_Duration),
Category = ifelse(Experiment_Duration > 60, "extra", "ok"),
Duration = ifelse(Experiment_Duration > 60, 60, Experiment_Duration),
Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
ggplot(aes(y = Participant, x = Duration)) +
geom_point(aes(color = Group, shape = Category)) +
geom_vline(xintercept = median(df$Experiment_Duration), color = "red", linetype = "dashed") +
scale_shape_manual(values = c("extra" = 3, ok = 19)) +
scale_color_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
guides(color = "none", shape = "none") +
ggside::geom_xsidedensity(fill = "#4CAF50", color=NA) +
ggside::scale_xsidey_continuous(expand = c(0, 0)) +
labs(
title = "Experiment Completion Time",
x = "Duration (in minutes)",
y = "Participant"
) +
theme_bw() +
ggside::theme_ggside_void() +
theme(ggside.panel.scale = .3,
panel.border = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank())potentials$expe_duration <- arrange(df, Experiment_Duration) |>
select(Participant, Experiment_Duration) |>
head(5) plot_hist <- function(dat) {
dens <- rbind(
mutate(bayestestR::estimate_density(filter(dftask, RT1 < 40 & RT2 < 40)$RT1),
Phase="Emotional ratings",
y = y / max(y)),
mutate(bayestestR::estimate_density(filter(dftask, RT1 < 40 & RT2 < 40)$RT2),
Phase="Reality rating",
y = y / max(y))
)
dat |>
filter(RT1 < 40 & RT2 < 40) |> # Remove very long RTs
# mutate(Participant = fct_reorder(Participant, RT1)) |>
pivot_longer(cols = c(RT1, RT2), names_to = "Phase", values_to = "RT") |>
mutate(Phase = ifelse(Phase == "RT1", "Emotional ratings", "Reality rating")) |>
ggplot(aes(x=RT)) +
geom_area(data=dens, aes(x=x, y=y, fill=Phase), alpha=0.33, position="identity") +
geom_density(aes(color=Phase, y=after_stat(scaled)), linewidth=1.5) +
scale_x_sqrt(breaks=c(0, 2, 5, 10, 20)) +
theme_minimal() +
theme(axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.line.y = element_blank()) +
labs(title = "Distribution of Response Time for each Participant", x="Response time per stimuli (s)") +
facet_wrap(~Participant, ncol=5, scales="free_y") +
coord_cartesian(xlim = c(0, 25))
}plot_hist(dftask[dftask$Participant %in% df$Participant[1:60], ])plot_hist(dftask[dftask$Participant %in% df$Participant[61:120], ])plot_hist(dftask[dftask$Participant %in% df$Participant[121:180], ])plot_hist(dftask[dftask$Participant %in% df$Participant[181:240], ])plot_hist(dftask[dftask$Participant %in% df$Participant[241:264], ])df |>
mutate(Participant = fct_reorder(Participant, BAIT_Duration),
Category = ifelse(BAIT_Duration > 5, "extra", "ok"),
Duration = ifelse(BAIT_Duration > 5, 5, BAIT_Duration),
Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
ggplot(aes(y = Participant, x = Duration)) +
geom_point(aes(color = Group, shape = Category)) +
geom_vline(xintercept = median(df$BAIT_Duration), color = "red", linetype = "dashed") +
scale_shape_manual(values = c("extra" = 3, ok = 19)) +
scale_color_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
guides(color = "none", shape = "none") +
ggside::geom_xsidedensity(fill = "#9C27B0", color=NA) +
ggside::scale_xsidey_continuous(expand = c(0, 0)) +
labs(
title = "Questionnaire Completion Time",
x = "Duration (in minutes)",
y = "Participant"
) +
theme_bw() +
ggside::theme_ggside_void() +
theme(ggside.panel.scale = .3,
panel.border = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank()) dat <- dftask |>
filter(Relevance %in% c("Relevant", "Non-erotic")) |>
group_by(Participant, Type) |>
summarise(Arousal = mean(Arousal),
Valence = mean(Valence),
Enticement = mean(Enticement),
.groups = "drop") |>
pivot_wider(names_from = Type, values_from = all_of(c("Arousal", "Valence", "Enticement"))) |>
transmute(Participant = Participant,
Arousal = Arousal_Erotic - `Arousal_Non-erotic`,
Valence = Valence_Erotic - `Valence_Non-erotic`,
Enticement = Enticement_Erotic - `Enticement_Non-erotic`) |>
filter(!is.na(Arousal)) |>
mutate(Participant = fct_reorder(Participant, Arousal))
dat |>
pivot_longer(-Participant) |>
mutate(Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
ggplot(aes(x=value, y=Participant, fill=Group)) +
geom_bar(aes(fill=value), stat = "identity") +
scale_fill_gradient2(low = "#3F51B5", mid = "#FF9800", high = "#4CAF50", midpoint = 0) +
# scale_fill_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
theme_bw() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(title = "Difference between Erotic and Neutral", x="Erotic - Neutral") +
facet_wrap(~name, ncol=3, scales="free_x")potentials$emo_diff <- arrange(dat, Arousal) |>
head(5)# dftask[dftask$Participant == "S428", "Arousal"]
# dftask[dftask$Participant == "S498", "Arousal"]
# dftask[dftask$Participant == "S508", "Arousal"]
dat <- dftask |>
filter(!Participant %in% c("S428", "S498", "S508")) |>
summarize(cor_ArVal = cor(Arousal, Valence),
cor_ArEnt = cor(Arousal, Enticement),
.by="Participant")
dat |>
left_join(df[c("Participant", "Language")], by="Participant") |>
mutate(Participant = fct_reorder(Participant, cor_ArVal)) |>
pivot_longer(starts_with("cor_")) |>
mutate(Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
mutate(name = fct_relevel(name, "cor_ArVal", "cor_ArEnt"),
name = fct_recode(name, "Arousal - Valence" = "cor_ArVal", "Arousal - Enticement" = "cor_ArEnt")) |>
ggplot(aes(y = Participant, x = value)) +
geom_bar(aes(fill = Language), stat = "identity") +
# scale_fill_gradient2(low = "#3F51B5", mid = "#FF9800", high = "#4CAF50", midpoint = 0) +
# scale_fill_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
theme_bw() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(title = "Difference between Erotic and Neutral", x="Erotic - Neutral") +
facet_wrap(~name, ncol=3, scales="free_x")potentials$emo_cor <- arrange(dat, cor_ArVal) |>
head(5)c(as.character(potentials$expe_duration$Participant),
as.character(potentials$emo_diff$Participant),
as.character(potentials$emo_cor$Participant)) |>
table()
S203 S256 S297 S385 S393 S403 S416 S422 S429 S430 S446 S456 S466 S540 S541
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
df |>
ggplot(aes(x = Sex)) +
geom_bar(aes(fill = SexualOrientation)) +
scale_y_continuous(expand = c(0, 0), breaks = scales::pretty_breaks()) +
scale_fill_metro_d() +
labs(x = "Biological Sex", y = "Number of Participants", title = "Sex and Sexual Orientation", fill = "Sexual Orientation") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)We removed 15 participants that were incompatible with further analysis.
df <- filter(df, Sex != "Other" & SexualOrientation != "Other")
dftask <- filter(dftask, Participant %in% df$Participant)show_distribution <- function(dftask, target="Arousal") {
dftask |>
filter(SexualOrientation %in% c("Heterosexual", "Bisexual", "Homosexual")) |>
bayestestR::estimate_density(select=target,
at=c("Relevance", "Category", "Sex", "SexualOrientation"),
method="KernSmooth") |>
ggplot(aes(x = x, y = y)) +
geom_line(aes(color = Relevance, linetype = Category), linewidth=1) +
facet_grid(SexualOrientation~Sex, scales="free_y") +
scale_color_manual(values = c("Relevant" = "red", "Non-erotic" = "blue", "Irrelevant"="darkorange")) +
scale_y_continuous(expand = c(0, 0)) +
scale_x_continuous(expand = c(0, 0)) +
theme_minimal() +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
plot.title = element_text(face="bold")) +
labs(title = target)
}
(show_distribution(dftask, "Arousal") | show_distribution(dftask, "Valence")) /
(show_distribution(dftask, "Enticement") | show_distribution(dftask, "Realness")) +
patchwork::plot_layout(guides = "collect") +
patchwork::plot_annotation(title = "Distribution of Appraisals depending on the Sexual Profile",
theme = theme(plot.title = element_text(hjust = 0.5, face="bold"))) We kept only heterosexual participants (76.29%).
df <- filter(df, SexualOrientation == "Heterosexual")
dftask <- filter(dftask, Participant %in% df$Participant)df <- filter(df, !Participant %in% outliers)
dftask <- filter(dftask, Participant %in% df$Participant)The final sample includes 293 participants (Mean age = 34.0, SD = 13.4, range: [18, 80]; Sex: 32.1% females, 67.9% males, 0.0% other; Education: Bachelor, 34.47%; Doctorate, 7.17%; High School, 30.38%; Master, 25.26%; Other, 2.39%; Primary School, 0.34%; Country: 21.16% Colombia, 18.77% USA, 15.70% UK, 12.63% France, 31.74% other).
p_country <- dplyr::select(df, region = Country) |>
group_by(region) |>
summarize(n = n()) |>
right_join(map_data("world"), by = "region") |>
ggplot(aes(long, lat, group = group)) +
geom_polygon(aes(fill = n)) +
scale_fill_gradientn(colors = c("#FFEB3B", "red", "purple")) +
labs(fill = "N") +
theme_void() +
labs(title = "A Geographically Diverse Sample", subtitle = "Number of participants by country") +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2))
)
p_countryggwaffle::waffle_iron(df, ggwaffle::aes_d(group = Ethnicity), rows=10) |>
ggplot(aes(x, y, fill = group)) +
ggwaffle::geom_waffle() +
coord_equal() +
scale_fill_flat_d() +
ggwaffle::theme_waffle() +
labs(title = "Self-reported Ethnicity", subtitle = "Each square represents a participant", fill="") +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2)),
axis.title.x = element_blank(),
axis.title.y = element_blank()
)Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
p_age <- estimate_density(df$Age) |>
normalize(select = y) |>
mutate(y = y * 86) |> # To match the binwidth
ggplot(aes(x = x)) +
geom_histogram(data=df, aes(x = Age), fill = "#616161", bins=28) +
# geom_line(aes(y = y), color = "orange", linewidth=2) +
geom_vline(xintercept = mean(df$Age), color = "red", linewidth=1.5) +
# geom_label(data = data.frame(x = mean(df$Age) * 1.15, y = 0.95 * 75), aes(y = y), color = "red", label = paste0("Mean = ", format_value(mean(df$Age)))) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = "Age", y = "Number of Participants", color = NULL, subtitle = "Distribution of participants' age") +
theme_modern(axis.title.space = 10) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)
p_agep_edu <- df |>
mutate(Education = fct_relevel(Education, "Other", "Primary School", "High School", "Bachelor", "Master", "Doctorate")) |>
ggplot(aes(x = Education)) +
geom_bar(aes(fill = Education)) +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
scale_fill_viridis_d(guide = "none") +
labs(title = "Education", y = "Number of Participants", subtitle = "Participants per achieved education level") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)
p_educolors <- c(
"NA" = "#2196F3", "None" = "#E91E63", "Condoms (for partner)" = "#9C27B0",
"Combined pills" = "#FF9800", "Intrauterine Device (IUD)" = "#FF5722",
"Intrauterine System (IUS)" = "#795548", "Progestogen pills" = "#4CAF50",
"Other" = "#FFC107", "Condoms (female)" = "#607D8B"
)
colors <- colors[names(colors) %in% c("NA", df$BirthControl)]
p_sex <- df |>
mutate(BirthControl = ifelse(Sex == "Male", "NA", BirthControl),
BirthControl = fct_relevel(BirthControl, names(colors))) |>
ggplot(aes(x = Sex)) +
geom_bar(aes(fill = BirthControl)) +
scale_y_continuous(expand = c(0, 0), breaks = scales::pretty_breaks()) +
scale_fill_manual(
values = colors,
breaks = names(colors)[2:length(colors)]
) +
labs(x = "Biological Sex", y = "Number of Participants", title = "Sex and Birth Control Method", fill = "Birth Control") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)
p_sexp_sexprofile <- df |>
select(Participant, Sex, SexualOrientation, SexualActivity, COPS_Duration_1, COPS_Frequency_2) |>
pivot_longer(-all_of(c("Participant", "Sex"))) |>
mutate(name = str_replace_all(name, "SexualOrientation", "Sexual Orientation"),
name = str_replace_all(name, "SexualActivity", "Sexual Activity"),
name = str_replace_all(name, "COPS_Duration_1", "Pornography Usage (Duration)"),
name = str_replace_all(name, "COPS_Frequency_2", "Pornography Usage (Frequency)")) |>
ggplot(aes(x = value, fill=Sex)) +
geom_bar() +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
scale_fill_manual(values = c("Male"= "#64B5F6", "Female"= "#F06292")) +
labs(title = "Sexual Profile of Participants") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1), angle = 45, hjust = 1),
axis.title.x = element_blank(),
axis.title.y = element_blank()
) +
facet_wrap(~name, scales = "free")
p_sexprofilep_language <- df |>
ggplot(aes(x = Language_Level)) +
geom_bar() +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
labs(x = "Level", y = "Number of Participants", title = "Language Level") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1))
)
p_expertise <- df |>
ggplot(aes(x = AI_Knowledge)) +
geom_bar() +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
labs(x = "Level", y = "Number of Participants", title = "AI-Expertise") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1))
)
p_language | p_expertisep_country /
(p_age + p_edu)This section pertains to the validation of the BAIT scale measuring beliefs and expectations about artificial creations.
bait <- df |>
select(starts_with("BAIT_"), -BAIT_Duration) |>
rename_with(function(x) gsub("BAIT_\\d_", "", x))
cor <- correlation::correlation(bait, redundant = TRUE) |>
correlation::cor_sort() |>
correlation::cor_lower()
clean_labels <- function(x) {
x <- str_remove_all(x, "BAIT_") |>
str_replace_all("_", " - ")
}
cor |>
mutate(val = paste0(insight::format_value(r), format_p(p, stars_only=TRUE))) |>
mutate(Parameter2 = fct_rev(Parameter2)) |>
mutate(Parameter1 = fct_relabel(Parameter1, clean_labels),
Parameter2 = fct_relabel(Parameter2, clean_labels)) |>
ggplot(aes(x=Parameter1, y=Parameter2)) +
geom_tile(aes(fill = r), color = "white") +
geom_text(aes(label = val), size = 3) +
labs(title = "Correlation Matrix",
subtitle = "Beliefs about Artificial Information Technology (BAIT)") +
scale_fill_gradient2(
low = "#2196F3",
mid = "white",
high = "#F44336",
breaks = c(-1, 0, 1),
guide = guide_colourbar(ticks=FALSE),
midpoint = 0,
na.value = "grey85",
limit = c(-1, 1)) +
theme_minimal() +
theme(legend.title = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))n <- parameters::n_factors(bait, package = "nFactors")
plot(n)efa <- parameters::factor_analysis(bait, cor=cor(bait), n=3, rotation = "oblimin",
sort=TRUE, scores="tenBerge", fm="ml")
plot(efa)display(efa)| Variable | ML3 | ML1 | ML2 | Complexity | Uniqueness |
|---|---|---|---|---|---|
| EnvironmentReal | 0.66 | 0.03 | -0.05 | 1.01 | 0.55 |
| VideosIssues | 0.56 | -0.22 | 0.21 | 1.63 | 0.60 |
| ImagesRealistic | 0.52 | 0.06 | -0.18 | 1.27 | 0.65 |
| ImitatingReality | 0.49 | 0.03 | -0.18 | 1.28 | 0.68 |
| VideosRealistic | 8.59e-05 | 0.99 | 0.02 | 1.00 | 5.00e-03 |
| TextRealistic | 0.15 | -0.03 | -0.67 | 1.11 | 0.45 |
| TextIssues | 0.08 | 0.06 | 0.64 | 1.05 | 0.60 |
| ImagesIssues | 0.02 | 0.26 | 0.28 | 2.01 | 0.84 |
The 3 latent factors (oblimin rotation) accounted for 45.10% of the total variance of the original data (ML3 = 16.89%, ML1 = 14.29%, ML2 = 13.92%).
m1 <- lavaan::cfa(
"G =~ ImitatingReality + EnvironmentReal + VideosIssues + TextIssues + VideosRealistic + ImagesRealistic + ImagesIssues + TextRealistic",
data=bait)
m2 <- lavaan::cfa(
"Images =~ ImitatingReality + EnvironmentReal + ImagesRealistic + ImagesIssues + VideosIssues + VideosRealistic
Text =~ TextIssues + TextRealistic",
data=bait)
m3 <- lavaan::cfa(
"Images =~ ImitatingReality + EnvironmentReal + ImagesRealistic + ImagesIssues
Videos =~ VideosIssues + VideosRealistic
Text =~ TextIssues + TextRealistic",
data=bait)
m4 <- lavaan::cfa(
"Environment =~ ImitatingReality + EnvironmentReal
Images =~ ImagesRealistic + ImagesIssues
Videos =~ VideosIssues + VideosRealistic
Text =~ TextIssues + TextRealistic",
data=bait)
m5 <- lavaan::cfa(efa_to_cfa(efa, threshold="max"), data=bait)
# bayestestR::bayesfactor_models(m1, m2)
lavaan::anova(m1, m2, m3, m4, m5)Warning in lavTestLRT(object = object, ..., model.names = NAMES): lavaan WARNING:
Some restricted models fit better than less restricted models;
either these models are not nested, or the less restricted model
failed to reach a global optimum. Smallest difference =
-4.36016981458226
Chi-Squared Difference Test
Df AIC BIC Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
m4 14 -96.253 -15.289 58.784
m3 17 -89.518 -19.594 71.519 12.735 0.10524 3 0.005246 **
m5 18 -61.411 4.832 101.626 30.106 0.31518 1 4.090e-08 ***
m2 19 -67.772 -5.209 97.266 -4.360 0.00000 1 1.000000
m1 20 -22.398 36.485 144.639 47.373 0.39783 1 5.867e-12 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
display(parameters::parameters(m3, standardize = TRUE))| Link | Coefficient | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| Images =~ ImitatingReality | 0.58 | 0.05 | (0.47, 0.68) | 10.91 | < .001 |
| Images =~ EnvironmentReal | 0.61 | 0.05 | (0.51, 0.71) | 11.85 | < .001 |
| Images =~ ImagesRealistic | 0.62 | 0.05 | (0.52, 0.72) | 12.24 | < .001 |
| Images =~ ImagesIssues | -0.28 | 0.06 | (-0.40, -0.15) | -4.28 | < .001 |
| Videos =~ VideosIssues | 0.77 | 0.09 | (0.60, 0.95) | 8.72 | < .001 |
| Videos =~ VideosRealistic | -0.48 | 0.07 | (-0.62, -0.35) | -6.95 | < .001 |
| Text =~ TextIssues | 0.49 | 0.07 | (0.35, 0.63) | 6.91 | < .001 |
| Text =~ TextRealistic | -0.95 | 0.11 | (-1.16, -0.74) | -8.87 | < .001 |
| Link | Coefficient | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| Images ~~ Videos | 0.61 | 0.09 | (0.44, 0.78) | 7.11 | < .001 |
| Images ~~ Text | -0.53 | 0.08 | (-0.70, -0.37) | -6.41 | < .001 |
| Videos ~~ Text | -0.16 | 0.08 | (-0.31, -6.32e-03) | -2.04 | 0.041 |
Exploratory Graph Analysis (EGA) is a recently developed framework for psychometric assessment, that can be used to estimate the number of dimensions in questionnaire data using network estimation methods and community detection algorithms, and assess the stability of dimensions and items using bootstrapping.
Unique Variable Analysis (Christensen, Garrido, & Golino, 2023) uses the weighted topological overlap measure (Nowick et al., 2009) on an estimated network. Values greater than 0.25 are determined to have considerable local dependence (i.e., redundancy) that should be handled (variables with the highest maximum weighted topological overlap to all other variables (other than the one it is redundant with) should be removed).
uva <- EGAnet::UVA(data = bait, cut.off = 0.3)
uvaVariable pairs with wTO > 0.30 (large-to-very large redundancy)
node_i node_j wto
TextRealistic TextIssues 0.343
----
Variable pairs with wTO > 0.25 (moderate-to-large redundancy)
----
Variable pairs with wTO > 0.20 (small-to-moderate redundancy)
node_i node_j wto
VideosRealistic VideosIssues 0.213
uva$keep_remove$keep
[1] "TextIssues"
$remove
[1] "TextRealistic"
ega <- list()
for(model in c("glasso", "TMFG")) {
for(algo in c("walktrap", "louvain")) {
for(type in c("ega", "ega.fit", "riEGA")) { # "hierega"
if(type=="ega.fit" & algo=="louvain") next # Too slow
ega[[paste0(model, "_", algo, "_", type)]] <- EGAnet::bootEGA(
data = bait,
seed=123,
model=model,
algorithm=algo,
EGA.type=type,
type="resampling",
plot.itemStability=FALSE,
verbose=FALSE)
}
}
}The random-intercept model converged. Wording effects likely. Results are only valid if data are [4;munrecoded[0m.
The random-intercept model converged. Wording effects likely. Results are only valid if data are [4;munrecoded[0m.
The random-intercept model converged. Wording effects likely. Results are only valid if data are [4;munrecoded[0m.
The random-intercept model converged. Wording effects likely. Results are only valid if data are [4;munrecoded[0m.
EGAnet::compare.EGA.plots(
ega$glasso_walktrap_ega, ega$glasso_walktrap_ega.fit,
ega$glasso_louvain_ega, ega$TMFG_louvain_ega,
ega$glasso_louvain_riEGA, ega$glasso_walktrap_riEGA,
ega$TMFG_walktrap_ega, ega$TMFG_walktrap_ega.fit,
ega$TMFG_louvain_riEGA, ega$TMFG_walktrap_riEGA,
labels=c("glasso_walktrap_ega", "glasso_walktrap_ega.fit",
"glasso_louvain_ega", "TMFG_louvain_ega",
"glasso_louvain_riEGA", "glasso_walktrap_riEGA",
"TMFG_walktrap_ega", "TMFG_walktrap_ega.fit",
"TMFG_louvain_riEGA", "TMFG_walktrap_riEGA"),
rows=5,
plot.all = FALSE)$allFigures shows how often each variable is replicating in their empirical structure across bootstraps.
patchwork::wrap_plots(lapply(ega, plot), nrow = 4)ega_final <- ega$glasso_walktrap_riEGA$EGA
plot(ega_final)ega_scores <- EGAnet::net.scores(bait, ega_final)$scores$std.scores |>
as.data.frame() |>
setNames(c("EGA_Text", "EGA_Image", "EGA_Videos")) # Merge with data
scores <- lavaan::predict(m3) |>
as.data.frame() |>
datawizard::data_addprefix("CFA_") |>
# data_rename(c("ML1", "ML2"), c("BAIT_SEM1", "BAIT_SEM2")) |>
cbind(ega_scores) |>
mutate(Participant = df$Participant)
scores$BAIT_Videos <- (bait$VideosRealistic + (1 - bait$VideosIssues)) / 2
scores$BAIT_Images <- (bait$ImagesRealistic + (1 - bait$ImagesIssues) + bait$ImitatingReality + bait$EnvironmentReal) / 4
scores$BAIT_Text <- (bait$TextRealistic + (1 - bait$TextIssues)) / 2
df <- full_join(df, scores, by="Participant")We computed two type of general scores for the BAIT scale, an empirical score based on the average of observed data (of the most loading items) and a model-based score as predicted by the structural model. The first one gives equal weight to all items (and keeps the same [0-1] range), while the second one is based on the factor loadings and the covariance structure.
correlation::cor_test(scores, "BAIT_Images", "CFA_Images") |>
plot() +
ggside::geom_xsidedensity(aes(x=BAIT_Images), color="grey", linewidth=1) +
ggside::geom_ysidedensity(aes(y=CFA_Images), color="grey", linewidth=1) +
ggside::scale_xsidey_continuous(expand = c(0, 0)) +
ggside::scale_ysidex_continuous(expand = c(0, 0)) +
ggside::theme_ggside_void() +
theme(ggside.panel.scale = .1) While the two correlate substantially, they have different benefits. The empirical score has a more straightforward meaning and is more reproducible (as it is not based on a model fitted on a specific sample), the model-based score takes into account the relative importance of the contribution of each item to their factor.
table <- correlation::correlation(scores) |>
summary()
format(table) |>
datawizard::data_rename("Parameter", "Variables") |>
gt::gt() |>
gt::cols_align(align="center") |>
gt::tab_options(column_labels.font.weight="bold")| Variables | BAIT_Text | BAIT_Images | BAIT_Videos | EGA_Videos | EGA_Image | EGA_Text | CFA_Text | CFA_Videos |
|---|---|---|---|---|---|---|---|---|
| CFA_Images | 0.52*** | 0.91*** | -0.56*** | 0.14 | 0.92*** | 0.29*** | -0.63*** | 0.74*** |
| CFA_Videos | 0.15 | 0.56*** | -0.92*** | 0.31*** | 0.56*** | 0.11 | -0.21** | |
| CFA_Text | -0.89*** | -0.45*** | 0.17 | 0.08 | -0.45*** | -0.47*** | ||
| EGA_Text | 3.94e-03 | 0.16 | -0.08 | 0.02 | 0.17 | |||
| EGA_Image | 0.37*** | 0.98*** | -0.37*** | 0.05 | ||||
| EGA_Videos | -0.11 | 0.02 | 2.62e-03 | |||||
| BAIT_Videos | -0.14 | -0.38*** | ||||||
| BAIT_Images | 0.37*** |
table <- correlation::correlation(
select(scores, starts_with("BAIT_")),
select(df, starts_with("GAAIS")),
bayesian=TRUE) |>
summary()
format(table) |>
datawizard::data_rename("Parameter", "Variables") |>
gt::gt() |>
gt::cols_align(align="center") |>
gt::tab_options(column_labels.font.weight="bold")| Variables | GAAIS_Negative_9 | GAAIS_Positive_17 | GAAIS_Positive_12 | GAAIS_Negative_15 | GAAIS_Positive_7 | GAAIS_Negative_10 |
|---|---|---|---|---|---|---|
| BAIT_Videos | -0.11* | 0.02 | 0.11 | -0.21*** | 0.16** | -0.15** |
| BAIT_Images | 0.16** | 0.22*** | 0.24*** | 0.03 | 0.14** | -0.03 |
| BAIT_Text | 0.07 | 0.21*** | 0.15** | -0.12* | 0.24*** | -0.16** |
df |>
ggplot(aes(x=as.factor(AI_Knowledge), y=BAIT_Images)) +
geom_boxplot()# m <- betareg::betareg(BAIT ~ AI_Knowledge, data=df)
m <- lm(BAIT_Images ~ poly(AI_Knowledge, 2), data=df)
# m <- brms::brm(BAIT ~ mo(AI_Knowledge), data=df, algorithm = "meanfield")
# m <- brms::brm(BAIT ~ AI_Knowledge, data=dfsub, algorithm = "meanfield")
display(parameters::parameters(m))| Parameter | Coefficient | SE | 95% CI | t(290) | p |
|---|---|---|---|---|---|
| (Intercept) | 0.69 | 9.22e-03 | (0.67, 0.71) | 74.89 | < .001 |
| AI Knowledge (1st degree) | -0.10 | 0.16 | (-0.41, 0.21) | -0.61 | 0.542 |
| AI Knowledge (2nd degree) | 0.41 | 0.16 | (0.10, 0.72) | 2.59 | 0.010 |
marginaleffects::predictions(m, by=c("AI_Knowledge"), newdata = "marginalmeans") |>
as.data.frame() |>
ggplot(aes(x=AI_Knowledge, y=estimate)) +
geom_jitter2(data=df, aes(y=BAIT_Images), alpha=0.2, width=0.1) +
geom_line(aes(group=1), position = position_dodge(width=0.2)) +
geom_pointrange(aes(ymin = conf.low, ymax=conf.high), position = position_dodge(width=0.2)) +
theme_minimal() +
labs(x = "AI-Knowledge", y="BAIT Score")# m <- betareg::betareg(BAIT ~ Sex / Age, data=df, na.action=na.omit)
m <- lm(BAIT_Images ~ Sex / Age, data=df)
display(parameters::parameters(m))| Parameter | Coefficient | SE | 95% CI | t(289) | p |
|---|---|---|---|---|---|
| (Intercept) | 0.65 | 0.04 | (0.57, 0.74) | 14.77 | < .001 |
| Sex (Male) | 0.05 | 0.06 | (-0.06, 0.16) | 0.95 | 0.342 |
| Sex (Female) × Age | 2.42e-03 | 1.53e-03 | (-5.94e-04, 5.43e-03) | 1.58 | 0.115 |
| Sex (Male) × Age | -7.14e-04 | 8.46e-04 | (-2.38e-03, 9.51e-04) | -0.84 | 0.399 |
glm(Feedback_LabelsIncorrect ~ BAIT_Images * AI_Knowledge,
data=mutate(df, Feedback_LabelsIncorrect = ifelse(Feedback_LabelsIncorrect=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Labels are Incorrect'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -0.33 | 1.73 | (-3.72, 3.09) | -0.19 | 0.849 |
| BAIT Images | -1.06 | 2.37 | (-5.81, 3.53) | -0.45 | 0.656 |
| AI Knowledge | 0.29 | 0.47 | (-0.64, 1.23) | 0.61 | 0.539 |
| BAIT Images × AI Knowledge | -0.13 | 0.65 | (-1.39, 1.16) | -0.20 | 0.843 |
glm(Feedback_LabelsReversed ~ BAIT_Images * AI_Knowledge,
data=mutate(df, Feedback_LabelsReversed = ifelse(Feedback_LabelsReversed=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Labels are reversed'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -0.12 | 2.85 | (-5.75, 5.44) | -0.04 | 0.966 |
| BAIT Images | -2.81 | 4.01 | (-10.93, 4.76) | -0.70 | 0.484 |
| AI Knowledge | -0.45 | 0.81 | (-2.04, 1.13) | -0.55 | 0.581 |
| BAIT Images × AI Knowledge | 0.49 | 1.13 | (-1.68, 2.72) | 0.43 | 0.666 |
glm(Feedback_CouldDiscriminate ~ BAIT_Images * AI_Knowledge,
data=mutate(df, Feedback_CouldDiscriminate = ifelse(Feedback_CouldDiscriminate=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Easy to discriminate'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -0.81 | 3.05 | (-7.01, 5.01) | -0.26 | 0.791 |
| BAIT Images | -1.85 | 4.06 | (-9.89, 6.05) | -0.46 | 0.648 |
| AI Knowledge | -0.80 | 0.88 | (-2.52, 0.94) | -0.90 | 0.367 |
| BAIT Images × AI Knowledge | 0.94 | 1.15 | (-1.31, 3.19) | 0.82 | 0.414 |
glm(Feedback_CouldNotDiscriminate ~ BAIT_Images * AI_Knowledge,
data=mutate(df, Feedback_CouldNotDiscriminate = ifelse(Feedback_CouldNotDiscriminate=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Hard to discriminate'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -3.04 | 1.84 | (-6.78, 0.49) | -1.65 | 0.100 |
| BAIT Images | 4.68 | 2.50 | (-0.07, 9.81) | 1.87 | 0.061 |
| AI Knowledge | -0.01 | 0.51 | (-1.01, 1.00) | -0.02 | 0.982 |
| BAIT Images × AI Knowledge | -0.18 | 0.69 | (-1.56, 1.16) | -0.26 | 0.795 |
glm(Feedback_Fun ~ BAIT_Images * AI_Knowledge,
data=mutate(df, Feedback_Fun = ifelse(Feedback_Fun=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Fun'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | 7.43e-03 | 1.68 | (-3.32, 3.31) | 4.42e-03 | 0.996 |
| BAIT Images | 0.75 | 2.29 | (-3.71, 5.32) | 0.33 | 0.742 |
| AI Knowledge | -0.13 | 0.47 | (-1.05, 0.79) | -0.28 | 0.777 |
| BAIT Images × AI Knowledge | 0.16 | 0.63 | (-1.09, 1.41) | 0.26 | 0.798 |
glm(Feedback_Boring ~ BAIT_Images * AI_Knowledge,
data=mutate(df, Feedback_Boring = ifelse(Feedback_Boring=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Boring'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -0.18 | 2.06 | (-4.21, 3.89) | -0.09 | 0.932 |
| BAIT Images | -2.18 | 2.90 | (-8.04, 3.36) | -0.75 | 0.452 |
| AI Knowledge | 0.01 | 0.56 | (-1.08, 1.11) | 0.03 | 0.979 |
| BAIT Images × AI Knowledge | 0.14 | 0.78 | (-1.38, 1.69) | 0.18 | 0.859 |
write.csv(df, "../data/data_participants.csv", row.names = FALSE)
write.csv(dftask, "../data/data.csv", row.names = FALSE)
Comments
Code